## This file prepares Thomas Holmes' State Border Data Set for use in my analyses, ##
## data download from: https://users.econ.umn.edu/~holmes/data/BorderData.html ##
## Created by Meredith Dost and last run 8/18/2025 ##

# set working directory
#setwd("other_data/border_distance")

# create custom function to help w/cleaning
`%!in%` = Negate(`%in%`)
# read in data
dist <- read.csv("cntydist_holmes.csv")
# create county fips
dist$fips <- dist$ST*1000 + dist$COUNTY
# de-duplicate dataset so we have county unit of analysis rather than county pairs
dist_merge_fips <- unique(dist$fips[duplicated(dist$fips)])
dists_of_dups <- NULL
st1 <- NULL
st2 <- NULL
for(i in 1:length(dist_merge_fips)){
  sub <- subset(dist, fips==dist_merge_fips[i])
  dists_of_dups[i] <- min(sub$MINDIST)
  st1[i] <- sub$ST1[1]
  st2[i] <- sub$ST2[1]
  }
dist_merge_nodups <- subset(dist, fips %!in% dist_merge_fips)
dist_merge_nodups <- dist_merge_nodups[c("fips","MINDIST","ST1","ST2")]
dist_merge_dups <- cbind.data.frame(dist_merge_fips,dists_of_dups,st1,st2)
names(dist_merge_dups) <- c("fips","MINDIST","ST1","ST2")
dist_merge_combined <- rbind.data.frame(dist_merge_nodups,dist_merge_dups)
rm(dist_merge_dups,dist_merge_nodups,sub,i,st1,st2,dist_merge_fips,dists_of_dups)
# keep only variables of interest
dist <- dist_merge_combined[,1:2]

# merge in another dataset we'll use, to get the full list of county fips and determine missingness
#setwd("")
data <- read.csv("voting_data/demvoteshare_by_county.csv")
fips.data <- unique(data$fips)
fips.mindist.na <- fips.data[fips.data %!in% unique(dist$fips)]
rm(data)

## fixing "missing" counties per Holmes
fix <- read.csv("other_data/border_distance/mcenreis.csv")
fix$CENCNTY <- fix$ST*1000 + fix$CENCNTY
fix$REISCNTY <- fix$ST*1000 + fix$REISCNTY
#replace FIPS code 12086 with the old code 12025
#replace FIPS code 8014 with 8013
fix$CENCNTY[fix$CENCNTY==8013] <- 8014
fix$CENCNTY[fix$CENCNTY==12025] <- 12086
fix <- fix[fix$CENCNTY %in% fips.mindist.na & fix$ST!=15,]
fix <- fix[,-1]
names(fix)[2] <- "fips"
dist.fix <- merge(dist,fix, by = "fips")
dist.fix$fips <- dist.fix$CENCNTY
dist.fix <- dist.fix[,-3]
dist.final <- rbind.data.frame(dist,dist.fix)

## save out
#setwd("other_data/")
write.csv(dist.final, "border_distance.csv",row.names=F)
